LOADING DATA

library(SummarizedExperiment)
## Loading required package: GenomicRanges
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(limma)
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(edgeR)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:IRanges':
## 
##     collapse
## This is mgcv 1.8-12. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## 
## Attaching package: 'genefilter'
## The following object is masked from 'package:base':
## 
##     anyNA
library(ggplot2)
library(geneplotter)
## Loading required package: lattice
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: XML

We load the raw RNA-seq data counts set of Kidney renal clear-cell-carcinoma.

se <- readRDS(file.path("~/Documentos/MASTER/IEO/KIDNEY/seKIRC.rds"))

se
## class: RangedSummarizedExperiment 
## dim: 20115 614 
## metadata(5): experimentData annotation cancerTypeCode
##   cancerTypeDescription objectCreationDate
## assays(1): counts
## rownames(20115): 1 2 ... 102724473 103091865
## rowRanges metadata column names(3): symbol txlen txgc
## colnames(614): TCGA.3Z.A93Z.01A.11R.A37O.07
##   TCGA.6D.AA2E.01A.11R.A37O.07 ... TCGA.CZ.5988.11A.01R.1672.07
##   TCGA.CZ.5989.11A.01R.1672.07
## colData names(549): type bcr_patient_uuid ...
##   lymph_nodes_aortic_pos_by_ihc lymph_nodes_aortic_pos_total

QUALITY & NORMALIZATION

DGE object

We create a DGEList’ object to hold the dataset to be analysed in a better and more comprehensive way.

library(edgeR)
dge <- DGEList(counts=assays(se)$counts, genes=mcols(se))
## Warning in as.data.frame.DataFrame(genes, stringsAsFactors = FALSE):
## Arguments in '...' ignored

Moreover, we calculate \(\log_2\) CPM values of expression and we save them in an assay element.

assays(se)$logCPM <- cpm(dge, log=TRUE, prior.count=0.5)

Library size filtering

At this point, we need to determine if the library sizes of tumor samples and normal samples are similar or not.

libsize <- data.frame(libsize = dge$sample$lib.size/1e6, type = se$type)

summary(libsize)
##     libsize            type    
##  Min.   :  3.049   normal: 72  
##  1st Qu.: 38.075   tumor :542  
##  Median : 46.715               
##  Mean   : 46.679               
##  3rd Qu.: 54.647               
##  Max.   :115.546
ggplot(libsize) +
  geom_density(aes(x=libsize, fill=type), alpha=0.5) +
  ylab("density\n") + xlab("\nMillions of Reads") +
  theme_bw()

ggplot(libsize) +
  geom_histogram(
    aes(x=libsize, fill=type, y = (..count..)/sum(..count..)), alpha=0.5, binwidth=2
  ) + xlab("\nMillions of Reads") + ylab("% of Samples\n") + theme_bw()

dge.filt <- dge[, dge$sample$lib.size/1e6 > 45 ]

final.samples <- rownames(dge.filt$samples)

se.filt <-se[,colnames(se) %in% final.samples]
se.normal <- se.filt[,se.filt$type == "normal"]
se.tumor  <- se.filt[,se.filt$type == "tumor"]


normal.code <- substr(colnames(se.normal), 9, 12)
tumor.code <- substr(colnames(se.tumor), 9, 12)

common.codes <- intersect(normal.code, tumor.code)
length(common.codes)
## [1] 38
se.paired <- se.filt[,substr(colnames(se.filt), 9, 12) %in% common.codes]
summary(se.paired$type)
## normal  tumor 
##     38     38
colData(se.paired)$samplecodes <- substr(colnames(se.paired), 9, 12)
dge.filt <- DGEList(counts=assays(se.paired)$counts, genes=mcols(se.paired))
## Warning in as.data.frame.DataFrame(genes, stringsAsFactors = FALSE):
## Arguments in '...' ignored
dge.filt
## An object of class "DGEList"
## $counts
##    TCGA.B0.4712.01A.01R.1503.07 TCGA.B0.5402.01A.01R.1503.07
## 1                           171                           52
## 2                         72089                       254994
## 9                             0                            0
## 10                            0                            0
## 12                         1488                         1894
##    TCGA.B0.5690.01A.11R.1541.07 TCGA.B0.5705.01A.11R.1541.07
## 1                           129                           91
## 2                        214306                       141888
## 9                             0                            0
## 10                            0                            0
## 12                         1949                         2848
##    TCGA.B0.5709.01A.11R.1541.07 TCGA.B0.5711.01A.11R.1672.07
## 1                           182                           43
## 2                        212776                       193205
## 9                             0                            0
## 10                            0                            0
## 12                         2491                         2067
##    TCGA.B0.5712.01A.11R.1672.07 TCGA.B2.5636.01A.02R.1541.07
## 1                            15                          221
## 2                         58923                       162102
## 9                             0                            0
## 10                            0                            0
## 12                         1731                         2464
##    TCGA.B8.5549.01A.01R.1541.07 TCGA.B8.5552.01B.11R.1672.07
## 1                            67                          124
## 2                        148006                       250803
## 9                             0                            0
## 10                            0                            0
## 12                         1593                         2740
##    TCGA.CJ.5680.01A.11R.1541.07 TCGA.CJ.6030.01A.11R.1672.07
## 1                            82                           88
## 2                         74680                       119037
## 9                             0                            0
## 10                            0                            0
## 12                         2291                         1876
##    TCGA.CW.5580.01A.01R.1672.07 TCGA.CW.5585.01A.01R.1541.07
## 1                            73                           55
## 2                        214475                       137990
## 9                             0                            0
## 10                            0                            0
## 12                         3628                         2699
##    TCGA.CW.5587.01A.01R.1541.07 TCGA.CW.5589.01A.01R.1541.07
## 1                           283                          104
## 2                        147003                       161231
## 9                             0                            0
## 10                            0                            0
## 12                         2481                         1940
##    TCGA.CW.5591.01A.01R.1541.07 TCGA.CW.6087.01A.11R.1672.07
## 1                            48                          254
## 2                        141961                        44768
## 9                             0                            0
## 10                            0                            0
## 12                         1799                         2399
##    TCGA.CW.6088.01A.11R.1672.07 TCGA.CW.6090.01A.11R.1672.07
## 1                            36                           91
## 2                        200085                       120305
## 9                             0                            0
## 10                            0                            0
## 12                         3484                         1952
##    TCGA.CZ.4863.01A.01R.1503.07 TCGA.CZ.4864.01A.01R.1503.07
## 1                            97                          232
## 2                         80757                       121483
## 9                             0                            0
## 10                            0                            0
## 12                         2495                         3438
##    TCGA.CZ.4865.01A.02R.1503.07 TCGA.CZ.5452.01A.01R.1503.07
## 1                           141                          192
## 2                        196075                        90934
## 9                             0                            0
## 10                            0                            0
## 12                         2491                         1814
##    TCGA.CZ.5453.01A.01R.1503.07 TCGA.CZ.5454.01A.01R.1503.07
## 1                            31                          130
## 2                        122695                       190754
## 9                             0                            0
## 10                            0                            0
## 12                         1631                         1745
##    TCGA.CZ.5455.01A.01R.1503.07 TCGA.CZ.5457.01A.01R.1503.07
## 1                            43                          427
## 2                        277893                       130344
## 9                             0                            0
## 10                            0                            0
## 12                         2907                         1429
##    TCGA.CZ.5458.01A.01R.1503.07 TCGA.CZ.5461.01A.01R.1503.07
## 1                           121                          128
## 2                        210354                       183064
## 9                             0                            0
## 10                            0                            0
## 12                         2061                         2368
##    TCGA.CZ.5462.01A.01R.1503.07 TCGA.CZ.5463.01A.01R.1503.07
## 1                            47                           46
## 2                         53290                       160439
## 9                             0                            0
## 10                            0                            0
## 12                         2032                         3366
##    TCGA.CZ.5465.01A.01R.1503.07 TCGA.CZ.5467.01A.01R.1503.07
## 1                            60                           94
## 2                        151711                       187303
## 9                             0                            0
## 10                            0                            0
## 12                         3780                         2391
##    TCGA.CZ.5468.01A.01R.1503.07 TCGA.CZ.5469.01A.01R.1503.07
## 1                           315                          441
## 2                         84264                        48436
## 9                             0                            0
## 10                            0                            0
## 12                         1944                         2420
##    TCGA.CZ.5470.01A.01R.1503.07 TCGA.CZ.5984.01A.11R.1672.07
## 1                           161                          346
## 2                        101134                       107521
## 9                             0                            0
## 10                            0                            0
## 12                         1860                         2169
##    TCGA.B0.4712.11A.02R.1503.07 TCGA.B0.5402.11A.01R.1503.07
## 1                           125                          129
## 2                         53444                       212158
## 9                             0                            0
## 10                            0                            0
## 12                         2611                         1831
##    TCGA.B0.5690.11A.01R.1541.07 TCGA.B0.5705.11A.01R.1541.07
## 1                            48                           75
## 2                         44253                        94956
## 9                             0                            0
## 10                            0                            0
## 12                         2159                         2375
##    TCGA.B0.5709.11A.01R.1541.07 TCGA.B0.5711.11A.01R.1672.07
## 1                           100                           98
## 2                         61381                        81181
## 9                             0                            0
## 10                            0                            0
## 12                         2074                         2678
##    TCGA.B0.5712.11A.01R.1672.07 TCGA.B2.5636.11A.01R.1541.07
## 1                            94                           63
## 2                        165423                        88791
## 9                             0                            0
## 10                            0                            0
## 12                         1845                         2033
##    TCGA.B8.5549.11A.01R.1541.07 TCGA.B8.5552.11A.01R.1672.07
## 1                            62                           66
## 2                         68264                       122109
## 9                             0                            0
## 10                            0                            0
## 12                         2297                         1671
##    TCGA.CJ.5680.11A.01R.1541.07 TCGA.CJ.6030.11A.01R.1672.07
## 1                            71                           61
## 2                         85768                        66273
## 9                             0                            0
## 10                            0                            0
## 12                         2094                         1644
##    TCGA.CW.5580.11A.02R.1672.07 TCGA.CW.5585.11A.01R.1541.07
## 1                           139                           75
## 2                        162520                       111561
## 9                             0                            0
## 10                            0                            0
## 12                         2849                         2009
##    TCGA.CW.5587.11A.01R.1541.07 TCGA.CW.5589.11A.01R.1541.07
## 1                            64                           53
## 2                         57992                        58145
## 9                             0                            0
## 10                            0                            0
## 12                         2005                         1909
##    TCGA.CW.5591.11A.01R.1541.07 TCGA.CW.6087.11A.01R.1672.07
## 1                            71                           64
## 2                        268276                        50936
## 9                             0                            0
## 10                            0                            0
## 12                         3544                         2215
##    TCGA.CW.6088.11A.01R.1672.07 TCGA.CW.6090.11A.01R.1672.07
## 1                            37                           52
## 2                        124218                        54273
## 9                             0                            0
## 10                            0                            0
## 12                         3581                         2060
##    TCGA.CZ.4863.11A.01R.1503.07 TCGA.CZ.4864.11A.01R.1503.07
## 1                           142                           43
## 2                        100202                       100861
## 9                             0                            0
## 10                            0                            0
## 12                         2499                         2015
##    TCGA.CZ.4865.11A.01R.1503.07 TCGA.CZ.5452.11A.01R.1503.07
## 1                            87                           81
## 2                        133636                        57375
## 9                             0                            0
## 10                            0                            0
## 12                         2079                         2036
##    TCGA.CZ.5453.11A.01R.1503.07 TCGA.CZ.5454.11A.01R.1503.07
## 1                            63                          124
## 2                        129106                       160618
## 9                             0                            0
## 10                            0                            1
## 12                         2225                         3201
##    TCGA.CZ.5455.11A.01R.1503.07 TCGA.CZ.5457.11A.01R.1503.07
## 1                            65                          165
## 2                        132123                        91025
## 9                             0                            0
## 10                            0                            0
## 12                         2145                         2274
##    TCGA.CZ.5458.11A.01R.1503.07 TCGA.CZ.5461.11A.01R.1503.07
## 1                           105                           78
## 2                        125882                       114075
## 9                             0                            0
## 10                            0                            0
## 12                         2349                         2264
##    TCGA.CZ.5462.11A.01R.1503.07 TCGA.CZ.5463.11A.01R.1503.07
## 1                           109                           84
## 2                        107009                        94839
## 9                             0                            0
## 10                            0                            0
## 12                         3000                         2682
##    TCGA.CZ.5465.11A.01R.1503.07 TCGA.CZ.5467.11A.01R.1503.07
## 1                           137                          138
## 2                         71648                        96987
## 9                             0                            0
## 10                            0                            0
## 12                         2802                         1589
##    TCGA.CZ.5468.11A.01R.1503.07 TCGA.CZ.5469.11A.01R.1503.07
## 1                           156                          109
## 2                        115785                        95492
## 9                             0                            0
## 10                            0                            0
## 12                         2083                         2016
##    TCGA.CZ.5470.11A.01R.1503.07 TCGA.CZ.5984.11A.01R.1672.07
## 1                           133                           77
## 2                        175437                       103163
## 9                             0                            0
## 10                            0                            0
## 12                         2906                         1845
## 20110 more rows ...
## 
## $samples
##                              group lib.size norm.factors
## TCGA.B0.4712.01A.01R.1503.07     1 49725026            1
## TCGA.B0.5402.01A.01R.1503.07     1 62640283            1
## TCGA.B0.5690.01A.11R.1541.07     1 45633765            1
## TCGA.B0.5705.01A.11R.1541.07     1 60479323            1
## TCGA.B0.5709.01A.11R.1541.07     1 58992784            1
## 71 more rows ...
## 
## $genes
##     symbol txlen      txgc
## 1     A1BG  3322 0.5644190
## 2      A2M  4844 0.4882329
## 3     NAT1  2280 0.3942982
## 4     NAT2  1322 0.3895613
## 5 SERPINA3  3067 0.5249429
## 20110 more rows ...
Figure S1: Library sizes in increasing order.

Figure S1: Library sizes in increasing order.

Genes filtering: Distribution of expression levels

par(mfrow=c(1,2), mar=c(4,5,1,1))

Expression levels comparison between all samples

In the next part, we take a look on the distribution of expression values per sample in terms of logarithmic CPM units.

multidensity(as.list(as.data.frame(assays(se.paired)$logCPM)), xlab = "log2 CPM", legend = NULL, main = "All samples", cex.axis = 1.2, cex.lab = 1.5, las = 1)

We cannot observe significant diferences between samples in terms of expression levels.

As we have more than 200 samples, we now display the normal and tumor distribution separately.

Expression levels comparison between normal and tumor

normalCPM <- se.paired[,se.paired$type == "normal"]
tumorCPM <- se.paired[,se.paired$type == "tumor"]
multidensity(as.list(as.data.frame(assays(normalCPM)$logCPM)), xlab = "log2 CPM", legend = NULL, main = "Normal", cex.axis = 1.2, cex.lab = 1.5, las = 1)

multidensity(as.list(as.data.frame(assays(tumorCPM)$logCPM)), xlab = "log2 CPM", legend = NULL, main = "Tumor", cex.axis = 1.2, cex.lab = 1.5, las = 1)

Distribution of expression levels among genes

Now, we want to filter out the genes with a low expression. To do that, we can plot the CPMs in logarithmic scale.

genemean <- data.frame(Means=rowMeans(assays(se.paired)$logCPM[,-1]))

ggplot(genemean) +
  geom_bar(aes(x=Means), binwidth = 1, fill="white", color="black") +
  theme_bw() + xlab("\nlog2CPM") + ylab("Count\n") +
  geom_vline(xintercept=mean(genemean$Means), color="red")
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.

We can see how there are two peaks of genes. The first one correspond to these genes with a very low expression and thus we decide to remove them. In other to do that, we used the mean as a threshold.

avgexp <- rowMeans(assays(se.paired)$logCPM)
mask <- avgexp > mean(genemean$Means)

These are the numbers of genes before filtering:

dim(se.paired) # Before filtering SE
## [1] 20115    76
dim(dge.filt) # Before filtering DGE
## [1] 20115    76

These are the numbers of samples genes after filtering:

se.paired.genes <- se.paired[mask, ]
dim(se.paired.genes) # After filtering SE
## [1] 11413    76
dge.filt <- dge.filt[mask, ]
dim(dge.filt) # After filtering DGE
## [1] 11413    76

At this point, we can calculate the normaliation factors on the filtered expression data set.

dgenorm <- calcNormFactors(dge.filt)
assays(se.paired.genes)$logCPMnorm <- cpm(dgenorm, log=TRUE, prior.count=0.5)

MA-plots

We look at the MA-plots of the normal samples to see if there are systematic biases in gene expression levels in any of the sampes. We expect the slope to be around zero.

Figure S4: MA-plots of the normal samples.

Figure S4: MA-plots of the normal samples.

Figure S4: MA-plots of the normal samples.

Figure S4: MA-plots of the normal samples.

As we can see, we don’t see any samples with major biases in gene expression.

Now, we examine the tumor samples:

Figure S4: MA-plots of the tumor samples.

Figure S4: MA-plots of the tumor samples.


Batch identification

Now we’re going to analyze our data in order to search for batch effect that could interfere with the biological signal. First, we analyze some of the information contained in the barcode, such as tissue source site, center of sequenciation, plate and portion and analyte combinations. We use two approaches to try to identify batch effect, the Hierarchical Clustering and a Multidimensional Scaling plot.

tss <- substr(colnames(se.paired.genes), 6, 7)
table(tss)
## tss
## B0 B2 B8 CJ CW CZ 
## 14  2  4  4 16 36
center <- substr(colnames(se.paired.genes), 27, 28)
table(center)
## center
## 07 
## 76
plate <- substr(colnames(se.paired.genes), 22, 25)
table(plate)
## plate
## 1503 1541 1672 
##   38   20   18
portionanalyte <- substr(colnames(se.paired.genes), 18, 20)
table(portionanalyte)
## portionanalyte
## 01R 02R 11R 
##  60   4  12
samplevial <- substr(colnames(se.paired.genes), 14, 16)
table(samplevial)
## samplevial
## 01A 01B 11A 
##  37   1  38


GENDER batch effect

table(data.frame(TYPE=se.paired.genes$type, GENDER=se.paired.genes$gender))
##         GENDER
## TYPE     FEMALE MALE
##   normal     13   25
##   tumor      13   25
#logCPM <- cpm(dgenorm, log=TRUE, prior.count=3)
d <- as.dist(1-cor(assays(se.paired.genes)$logCPMnorm, method="spearman"))
sampleClustering <- hclust(d)
batch <- as.integer(factor(se.paired.genes$gender))
sampleDendrogram <- as.dendrogram(sampleClustering, hang=0.1)
names(batch) <- colnames(se.paired.genes)
outcome <- paste(substr(colnames(se.paired.genes), 9, 12), as.character(se.paired.genes$type), sep="-")
names(outcome) <- colnames(se.paired.genes)
sampleDendrogram <- dendrapply(sampleDendrogram,
                               function(x, batch, labels) {
                                 if (is.leaf(x)) {
                                   attr(x, "nodePar") <- list(lab.col=as.vector(batch[attr(x, "label")]))
                                   attr(x, "label") <- as.vector(labels[attr(x, "label")])
                                 }
                                 x
                               }, batch, outcome)
plot(sampleDendrogram, main="Hierarchical clustering of samples")
legend("topright", paste("TSS", sort(unique(batch)), levels(factor(se.paired.genes$gender))), fill=sort(unique(batch)))
Figure S6: Hierarchical clustering of the samples.

Figure S6: Hierarchical clustering of the samples.

plotMDS(dgenorm, labels=outcome, col=batch)
legend("bottomleft", paste("Gender", sort(unique(batch)), levels(factor(se.paired.genes$gender))),
       fill=sort(unique(batch)), inset=0.05)
Figure S7: Multidimensional scaling plot of the samples.

Figure S7: Multidimensional scaling plot of the samples.

TSS batch effect

table(data.frame(TYPE=se.paired.genes$type, TSS=tss))
##         TSS
## TYPE     B0 B2 B8 CJ CW CZ
##   normal  7  1  2  2  8 18
##   tumor   7  1  2  2  8 18
#logCPM <- cpm(dgenorm, log=TRUE, prior.count=3)
d <- as.dist(1-cor(assays(se.paired.genes)$logCPMnorm, method="spearman"))
sampleClustering <- hclust(d)
batch <- as.integer(factor(tss))
sampleDendrogram <- as.dendrogram(sampleClustering, hang=0.1)
names(batch) <- colnames(se.paired.genes)
outcome <- paste(substr(colnames(se.paired.genes), 9, 12), as.character(se.paired.genes$type), sep="-")
names(outcome) <- colnames(se.paired.genes)
sampleDendrogram <- dendrapply(sampleDendrogram,
                               function(x, batch, labels) {
                                 if (is.leaf(x)) {
                                   attr(x, "nodePar") <- list(lab.col=as.vector(batch[attr(x, "label")]))
                                   attr(x, "label") <- as.vector(labels[attr(x, "label")])
                                 }
                                 x
                               }, batch, outcome)
plot(sampleDendrogram, main="Hierarchical clustering of samples")
legend("topright", paste("TSS", sort(unique(batch)), levels(factor(tss))), fill=sort(unique(batch)))
Figure S6: Hierarchical clustering of the samples.

Figure S6: Hierarchical clustering of the samples.

plotMDS(dgenorm, labels=outcome, col=batch)
legend("bottomleft", paste("TSS", sort(unique(batch)), levels(factor(tss))),
       fill=sort(unique(batch)), inset=0.05)
Figure S7: Multidimensional scaling plot of the samples.

Figure S7: Multidimensional scaling plot of the samples.


PLATE batch effect

table(data.frame(TYPE=se.paired.genes$type, PLATE=plate))
##         PLATE
## TYPE     1503 1541 1672
##   normal   19   10    9
##   tumor    19   10    9
#logCPM <- cpm(dgenorm, log=TRUE, prior.count=3)
d <- as.dist(1-cor(assays(se.paired.genes)$logCPMnorm, method="spearman"))
sampleClustering <- hclust(d)
batch <- as.integer(factor(plate))
sampleDendrogram <- as.dendrogram(sampleClustering, hang=0.1)
names(batch) <- colnames(se.paired.genes)
outcome <- paste(substr(colnames(se.paired.genes), 9, 12), as.character(se.paired.genes$type), sep="-")
names(outcome) <- colnames(se.paired.genes)
sampleDendrogram <- dendrapply(sampleDendrogram,
                               function(x, batch, labels) {
                                 if (is.leaf(x)) {
                                   attr(x, "nodePar") <- list(lab.col=as.vector(batch[attr(x, "label")]))
                                   attr(x, "label") <- as.vector(labels[attr(x, "label")])
                                 }
                                 x
                               }, batch, outcome)
plot(sampleDendrogram, main="Hierarchical clustering of samples")
legend("topright", paste("PLATE", sort(unique(batch)), levels(factor(plate))), fill=sort(unique(batch)))
Figure S6: Hierarchical clustering of the samples.

Figure S6: Hierarchical clustering of the samples.

plotMDS(dgenorm, labels=outcome, col=batch)
legend("bottomleft", paste("plate", sort(unique(batch)), levels(factor(plate))),
       fill=sort(unique(batch)), inset=0.05)
Figure S7: Multidimensional scaling plot of the samples.

Figure S7: Multidimensional scaling plot of the samples.


PORTION-ANALYTE batch effect

table(data.frame(TYPE=se.paired.genes$type, P.analyte = portionanalyte))
##         P.analyte
## TYPE     01R 02R 11R
##   normal  36   2   0
##   tumor   24   2  12
#logCPM <- cpm(dgenorm, log=TRUE, prior.count=3)
d <- as.dist(1-cor(assays(se.paired.genes)$logCPMnorm, method="spearman"))
sampleClustering <- hclust(d)
batch <- as.integer(factor(portionanalyte))
sampleDendrogram <- as.dendrogram(sampleClustering, hang=0.1)
names(batch) <- colnames(se.paired.genes)
outcome <- paste(substr(colnames(se.paired.genes), 9, 12), as.character(se.paired.genes$type), sep="-")
names(outcome) <- colnames(se.paired.genes)
sampleDendrogram <- dendrapply(sampleDendrogram,
                               function(x, batch, labels) {
                                 if (is.leaf(x)) {
                                   attr(x, "nodePar") <- list(lab.col=as.vector(batch[attr(x, "label")]))
                                   attr(x, "label") <- as.vector(labels[attr(x, "label")])
                                 }
                                 x
                               }, batch, outcome)
plot(sampleDendrogram, main="Hierarchical clustering of samples")
legend("topright", paste("Portionanalyte", sort(unique(batch)), levels(factor(portionanalyte))), fill=sort(unique(batch)))
Figure S6: Hierarchical clustering of the samples.

Figure S6: Hierarchical clustering of the samples.

plotMDS(dgenorm, labels=outcome, col=batch)
legend("bottomleft", paste("Portionanalyte", sort(unique(batch)), levels(factor(portionanalyte))),
       fill=sort(unique(batch)), inset=0.05)
Figure S7: Multidimensional scaling plot of the samples.

Figure S7: Multidimensional scaling plot of the samples.

DIFFERENTIAL EXPRESSION ANALYSIS

MEAN-VARIANCE RELATIONSHIP

design <- model.matrix(~type + samplecodes, data = colData(se.paired.genes))
summary(design)
##   (Intercept)   typetumor   samplecodes4863   samplecodes4864  
##  Min.   :1    Min.   :0.0   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:1    1st Qu.:0.0   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :1    Median :0.5   Median :0.00000   Median :0.00000  
##  Mean   :1    Mean   :0.5   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:1    3rd Qu.:1.0   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1    Max.   :1.0   Max.   :1.00000   Max.   :1.00000  
##  samplecodes4865   samplecodes5402   samplecodes5452   samplecodes5453  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5454   samplecodes5455   samplecodes5457   samplecodes5458  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5461   samplecodes5462   samplecodes5463   samplecodes5465  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5467   samplecodes5468   samplecodes5469   samplecodes5470  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5549   samplecodes5552   samplecodes5580   samplecodes5585  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5587   samplecodes5589   samplecodes5591   samplecodes5636  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5680   samplecodes5690   samplecodes5705   samplecodes5709  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes5711   samplecodes5712   samplecodes5984   samplecodes6030  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##  samplecodes6087   samplecodes6088   samplecodes6090  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02632   Mean   :0.02632   Mean   :0.02632  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000
dgenorm
## An object of class "DGEList"
## $counts
##    TCGA.B0.4712.01A.01R.1503.07 TCGA.B0.5402.01A.01R.1503.07
## 2                         72089                       254994
## 12                         1488                         1894
## 14                         8130                        13499
## 16                        15743                         8655
## 18                          285                         5697
##    TCGA.B0.5690.01A.11R.1541.07 TCGA.B0.5705.01A.11R.1541.07
## 2                        214306                       141888
## 12                         1949                         2848
## 14                         5934                        10251
## 16                         7642                        12714
## 18                          821                         6150
##    TCGA.B0.5709.01A.11R.1541.07 TCGA.B0.5711.01A.11R.1672.07
## 2                        212776                       193205
## 12                         2491                         2067
## 14                         8520                         6996
## 16                        11427                         8106
## 18                         2424                          781
##    TCGA.B0.5712.01A.11R.1672.07 TCGA.B2.5636.01A.02R.1541.07
## 2                         58923                       162102
## 12                         1731                         2464
## 14                         9469                         5671
## 16                         8175                        12293
## 18                         2964                          483
##    TCGA.B8.5549.01A.01R.1541.07 TCGA.B8.5552.01B.11R.1672.07
## 2                        148006                       250803
## 12                         1593                         2740
## 14                         4861                         5906
## 16                        12127                         9849
## 18                          880                          692
##    TCGA.CJ.5680.01A.11R.1541.07 TCGA.CJ.6030.01A.11R.1672.07
## 2                         74680                       119037
## 12                         2291                         1876
## 14                         9380                         9306
## 16                        15652                         9259
## 18                         7623                         1214
##    TCGA.CW.5580.01A.01R.1672.07 TCGA.CW.5585.01A.01R.1541.07
## 2                        214475                       137990
## 12                         3628                         2699
## 14                         8573                         9558
## 16                        10712                         8662
## 18                          871                         1005
##    TCGA.CW.5587.01A.01R.1541.07 TCGA.CW.5589.01A.01R.1541.07
## 2                        147003                       161231
## 12                         2481                         1940
## 14                        10829                         6297
## 16                        11349                         7001
## 18                         3171                         3062
##    TCGA.CW.5591.01A.01R.1541.07 TCGA.CW.6087.01A.11R.1672.07
## 2                        141961                        44768
## 12                         1799                         2399
## 14                         4985                         7436
## 16                         9385                        21165
## 18                         5870                         1801
##    TCGA.CW.6088.01A.11R.1672.07 TCGA.CW.6090.01A.11R.1672.07
## 2                        200085                       120305
## 12                         3484                         1952
## 14                         7859                         8142
## 16                        10274                         8952
## 18                         4186                          608
##    TCGA.CZ.4863.01A.01R.1503.07 TCGA.CZ.4864.01A.01R.1503.07
## 2                         80757                       121483
## 12                         2495                         3438
## 14                         8369                         9499
## 16                        10205                        11632
## 18                         1810                         1263
##    TCGA.CZ.4865.01A.02R.1503.07 TCGA.CZ.5452.01A.01R.1503.07
## 2                        196075                        90934
## 12                         2491                         1814
## 14                        11115                         7968
## 16                        12748                         9549
## 18                         1245                         1826
##    TCGA.CZ.5453.01A.01R.1503.07 TCGA.CZ.5454.01A.01R.1503.07
## 2                        122695                       190754
## 12                         1631                         1745
## 14                         5293                         8773
## 16                        12352                         9966
## 18                         4484                          626
##    TCGA.CZ.5455.01A.01R.1503.07 TCGA.CZ.5457.01A.01R.1503.07
## 2                        277893                       130344
## 12                         2907                         1429
## 14                         9900                         9737
## 16                        13560                        15042
## 18                         1230                         3262
##    TCGA.CZ.5458.01A.01R.1503.07 TCGA.CZ.5461.01A.01R.1503.07
## 2                        210354                       183064
## 12                         2061                         2368
## 14                         8476                        13330
## 16                         8138                        12106
## 18                          618                          504
##    TCGA.CZ.5462.01A.01R.1503.07 TCGA.CZ.5463.01A.01R.1503.07
## 2                         53290                       160439
## 12                         2032                         3366
## 14                         8986                        11367
## 16                        13632                        12872
## 18                         1178                         2910
##    TCGA.CZ.5465.01A.01R.1503.07 TCGA.CZ.5467.01A.01R.1503.07
## 2                        151711                       187303
## 12                         3780                         2391
## 14                        13893                         7658
## 16                        22800                        12269
## 18                          507                          676
##    TCGA.CZ.5468.01A.01R.1503.07 TCGA.CZ.5469.01A.01R.1503.07
## 2                         84264                        48436
## 12                         1944                         2420
## 14                         5298                         7583
## 16                        22086                         7607
## 18                          154                          464
##    TCGA.CZ.5470.01A.01R.1503.07 TCGA.CZ.5984.01A.11R.1672.07
## 2                        101134                       107521
## 12                         1860                         2169
## 14                         9693                         8567
## 16                         9597                        11801
## 18                          895                         1526
##    TCGA.B0.4712.11A.02R.1503.07 TCGA.B0.5402.11A.01R.1503.07
## 2                         53444                       212158
## 12                         2611                         1831
## 14                         9926                         7167
## 16                        12248                        11301
## 18                         1700                         9533
##    TCGA.B0.5690.11A.01R.1541.07 TCGA.B0.5705.11A.01R.1541.07
## 2                         44253                        94956
## 12                         2159                         2375
## 14                         8329                         6731
## 16                        12061                        13426
## 18                        19929                         7635
##    TCGA.B0.5709.11A.01R.1541.07 TCGA.B0.5711.11A.01R.1672.07
## 2                         61381                        81181
## 12                         2074                         2678
## 14                         7668                         8945
## 16                        11075                        12953
## 18                        25920                        18655
##    TCGA.B0.5712.11A.01R.1672.07 TCGA.B2.5636.11A.01R.1541.07
## 2                        165423                        88791
## 12                         1845                         2033
## 14                         6498                         6647
## 16                         9655                        11014
## 18                         3171                        24671
##    TCGA.B8.5549.11A.01R.1541.07 TCGA.B8.5552.11A.01R.1672.07
## 2                         68264                       122109
## 12                         2297                         1671
## 14                         7042                         4443
## 16                        11353                         7354
## 18                         1438                         1815
##    TCGA.CJ.5680.11A.01R.1541.07 TCGA.CJ.6030.11A.01R.1672.07
## 2                         85768                        66273
## 12                         2094                         1644
## 14                         5705                        10338
## 16                         9369                        14308
## 18                         4989                        14743
##    TCGA.CW.5580.11A.02R.1672.07 TCGA.CW.5585.11A.01R.1541.07
## 2                        162520                       111561
## 12                         2849                         2009
## 14                         9621                         8669
## 16                        12294                        12937
## 18                         8742                        28314
##    TCGA.CW.5587.11A.01R.1541.07 TCGA.CW.5589.11A.01R.1541.07
## 2                         57992                        58145
## 12                         2005                         1909
## 14                         7388                         8045
## 16                        13056                        11578
## 18                        28450                        32322
##    TCGA.CW.5591.11A.01R.1541.07 TCGA.CW.6087.11A.01R.1672.07
## 2                        268276                        50936
## 12                         3544                         2215
## 14                         9656                         7374
## 16                         8113                        13435
## 18                          474                        11368
##    TCGA.CW.6088.11A.01R.1672.07 TCGA.CW.6090.11A.01R.1672.07
## 2                        124218                        54273
## 12                         3581                         2060
## 14                        10869                         6421
## 16                        14779                        12600
## 18                         2032                        44607
##    TCGA.CZ.4863.11A.01R.1503.07 TCGA.CZ.4864.11A.01R.1503.07
## 2                        100202                       100861
## 12                         2499                         2015
## 14                        12234                         8487
## 16                        13471                        13555
## 18                         1854                        36257
##    TCGA.CZ.4865.11A.01R.1503.07 TCGA.CZ.5452.11A.01R.1503.07
## 2                        133636                        57375
## 12                         2079                         2036
## 14                         8473                         9704
## 16                         8622                        13255
## 18                         2164                         1945
##    TCGA.CZ.5453.11A.01R.1503.07 TCGA.CZ.5454.11A.01R.1503.07
## 2                        129106                       160618
## 12                         2225                         3201
## 14                         9980                        12174
## 16                        13556                        19838
## 18                         3538                         3821
##    TCGA.CZ.5455.11A.01R.1503.07 TCGA.CZ.5457.11A.01R.1503.07
## 2                        132123                        91025
## 12                         2145                         2274
## 14                        10933                        11980
## 16                        12748                        13296
## 18                         1221                          744
##    TCGA.CZ.5458.11A.01R.1503.07 TCGA.CZ.5461.11A.01R.1503.07
## 2                        125882                       114075
## 12                         2349                         2264
## 14                        14544                         8929
## 16                        18502                        13896
## 18                         2986                        10012
##    TCGA.CZ.5462.11A.01R.1503.07 TCGA.CZ.5463.11A.01R.1503.07
## 2                        107009                        94839
## 12                         3000                         2682
## 14                        14048                         8824
## 16                        20243                        12365
## 18                         3885                         6959
##    TCGA.CZ.5465.11A.01R.1503.07 TCGA.CZ.5467.11A.01R.1503.07
## 2                         71648                        96987
## 12                         2802                         1589
## 14                        10720                         8978
## 16                        15365                        12244
## 18                         1488                         2067
##    TCGA.CZ.5468.11A.01R.1503.07 TCGA.CZ.5469.11A.01R.1503.07
## 2                        115785                        95492
## 12                         2083                         2016
## 14                         9351                        11212
## 16                        12508                        13376
## 18                         1471                         1760
##    TCGA.CZ.5470.11A.01R.1503.07 TCGA.CZ.5984.11A.01R.1672.07
## 2                        175437                       103163
## 12                         2906                         1845
## 14                        12262                         6737
## 16                        17086                         9896
## 18                         2878                        20253
## 11408 more rows ...
## 
## $samples
##                              group lib.size norm.factors
## TCGA.B0.4712.01A.01R.1503.07     1 49725026    0.8058921
## TCGA.B0.5402.01A.01R.1503.07     1 62640283    0.9852068
## TCGA.B0.5690.01A.11R.1541.07     1 45633765    0.9727027
## TCGA.B0.5705.01A.11R.1541.07     1 60479323    0.9851701
## TCGA.B0.5709.01A.11R.1541.07     1 58992784    0.9797455
## 71 more rows ...
## 
## $genes
##      symbol txlen      txgc
## 2       A2M  4844 0.4882329
## 5  SERPINA3  3067 0.5249429
## 7      AAMP  2270 0.5792952
## 9      AARS  3477 0.5349439
## 10     ABAT  5586 0.4994629
## 11408 more rows ...
v <- voom(dgenorm, design, plot=FALSE)
FDRcutoff <- 0.01
mod0 <- model.matrix(~ 1, colData(se.paired.genes))
sv <- sva(v$E, mod = design, mod0 = mod0)
## Number of significant surrogate variables is:  7 
## Iteration (out of 5 ):1  2  3  4  5
design.voom <- cbind(design, sv$sv)


fit <- lmFit(v, design)
fit <- eBayes(fit)
res <- decideTests(fit, p.value = FDRcutoff)
summary(res)
##    (Intercept) typetumor samplecodes4863 samplecodes4864 samplecodes4865
## -1           0      3473               4               2               8
## 0          891      4203           11408           11410           11405
## 1        10522      3737               1               1               0
##    samplecodes5402 samplecodes5452 samplecodes5453 samplecodes5454
## -1               3               1               2               2
## 0            11410           11412           11411           11410
## 1                0               0               0               1
##    samplecodes5455 samplecodes5457 samplecodes5458 samplecodes5461
## -1               0               3               1               2
## 0            11413           11408           11412           11411
## 1                0               2               0               0
##    samplecodes5462 samplecodes5463 samplecodes5465 samplecodes5467
## -1               1               0               5               4
## 0            11411           11413           11407           11409
## 1                1               0               1               0
##    samplecodes5468 samplecodes5469 samplecodes5470 samplecodes5549
## -1               1               1               0               1
## 0            11411           11411           11413           11412
## 1                1               1               0               0
##    samplecodes5552 samplecodes5580 samplecodes5585 samplecodes5587
## -1               8               5               1               3
## 0            11404           11406           11412           11408
## 1                1               2               0               2
##    samplecodes5589 samplecodes5591 samplecodes5636 samplecodes5680
## -1               1               1               0               5
## 0            11412           11411           11413           11408
## 1                0               1               0               0
##    samplecodes5690 samplecodes5705 samplecodes5709 samplecodes5711
## -1               5               5               5               1
## 0            11408           11407           11408           11412
## 1                0               1               0               0
##    samplecodes5712 samplecodes5984 samplecodes6030 samplecodes6087
## -1               6               1               1               3
## 0            11405           11411           11412           11408
## 1                2               1               0               2
##    samplecodes6088 samplecodes6090
## -1               1               2
## 0            11412           11411
## 1                0               0
volcanoplot(fit, coef = 2, highlight = 7, fit$genes$symbol, main = "Model", las = 1)

colnames(design)
##  [1] "(Intercept)"     "typetumor"       "samplecodes4863"
##  [4] "samplecodes4864" "samplecodes4865" "samplecodes5402"
##  [7] "samplecodes5452" "samplecodes5453" "samplecodes5454"
## [10] "samplecodes5455" "samplecodes5457" "samplecodes5458"
## [13] "samplecodes5461" "samplecodes5462" "samplecodes5463"
## [16] "samplecodes5465" "samplecodes5467" "samplecodes5468"
## [19] "samplecodes5469" "samplecodes5470" "samplecodes5549"
## [22] "samplecodes5552" "samplecodes5580" "samplecodes5585"
## [25] "samplecodes5587" "samplecodes5589" "samplecodes5591"
## [28] "samplecodes5636" "samplecodes5680" "samplecodes5690"
## [31] "samplecodes5705" "samplecodes5709" "samplecodes5711"
## [34] "samplecodes5712" "samplecodes5984" "samplecodes6030"
## [37] "samplecodes6087" "samplecodes6088" "samplecodes6090"
toptable <- topTable(fit, coef = 2, n=Inf)
par(mfrow=c(1,2), mar=c(4,5,2,2))
hist(toptable$P.Value, xlab="Raw P-values", main = "", las = 1)
qqt(fit$t[,2], df=fit$df.prior + fit$df.residual, main="", pch = ".", cex=3, ylim = c(-50, 50))
abline(0,1, lwd = 2)

REPEATED MEASUREMENTS

se.paired.genes
## class: RangedSummarizedExperiment 
## dim: 11413 76 
## metadata(5): experimentData annotation cancerTypeCode
##   cancerTypeDescription objectCreationDate
## assays(3): counts logCPM logCPMnorm
## rownames(11413): 2 12 ... 101340251 101340252
## rowRanges metadata column names(3): symbol txlen txgc
## colnames(76): TCGA.B0.4712.01A.01R.1503.07
##   TCGA.B0.5402.01A.01R.1503.07 ... TCGA.CZ.5470.11A.01R.1503.07
##   TCGA.CZ.5984.11A.01R.1672.07
## colData names(550): type bcr_patient_uuid ...
##   lymph_nodes_aortic_pos_total samplecodes